data_sumtable <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ0_table")
data_acc <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ1_acc")
data_imp_features <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ2_Important_features", na = "NA")
data_dimreduction <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ3_reducing")
data_PROBAST <- readxl::read_excel(path = "PROBAST_assessment.xlsx", sheet = "Main")
colnames(data_PROBAST) <- data_PROBAST[1,]
data_PROBAST <- data_PROBAST[2:14,]
# Replace spaces and hyphens in column names with _
colnames(data_acc) <- gsub(" ","_",colnames(data_acc))
colnames(data_imp_features) <- gsub(" - ","_",colnames(data_imp_features))
colnames(data_imp_features) <- gsub(" ","_",colnames(data_imp_features))
colnames(data_dimreduction) <- gsub(" ","_",colnames(data_dimreduction))
## Step 1: Modify table entries
# Column: Responders/nonresponders
data_sumtable$`Responders/nonresponders` <- gsub(" responders, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonresponders","",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" remitters, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonremitters","",data_sumtable$`Responders/nonresponders`)
# Add proportion of nonresponders
# proportion_nonresponders <- character(nrow(data_sumtable))
# row_idx = 8
# for (row_idx in 1:nrow(data_sumtable)){
# strsplit(data_sumtable$`Responders/nonresponders`[row_idx], "\n")
# if
# entry
# entry <- data_sumtable$`Responders/nonresponders`[row_idx]
# responders <- strsplit(data_sumtable$`Responders/nonresponders`[row_idx], c("; \r\n"))[[1]][1]
# nonresponders <- strsplit(entry, "/")[[1]][2]
# proportion <- round(as.numeric(nonresponders)/(as.numeric(responders)+as.numeric(nonresponders))*100,0)
# entry <- paste(data_sumtable$`Responders/nonresponders`[row_idx]," (",proportion,"%)", sep = "")
# }
# Column: Treatment
data_sumtable$Treatment <- gsub("atypical antipsychotics","AAP",data_sumtable$Treatment)
#data_sumtable$treatment <- gsub("Alpha2-receptor-antagonists","AAP",data_sumtable$treatment)
# Column: Information on models tested
data_sumtable$`Information on models tested` <- gsub(" of models tested","",data_sumtable$`Information on models tested`)
# Column: Definition of treatment outcome
data_sumtable$`Definition of treatment outcome` <- gsub("\\s*\\([^\\)]+\\)","",data_sumtable$`Definition of treatment outcome`)
# Column: Input features
data_sumtable$`Type of functional-connectivity-based input features` <- gsub("[(]wb:.*)","",data_sumtable$`Type of functional-connectivity-based input features`)
# Column: FC estimation
data_sumtable$`Way of estimating the underlying functional connectivities`<- gsub("Group-information guided","Gig",data_sumtable$`Way of estimating the underlying functional connectivities`)
# Column: Algorithms
data_sumtable$`Algorithm(s) of the final classifier(s)`<- gsub("graph convolutional network","GCN",data_sumtable$`Algorithm(s) of the final classifier(s)`)
## Step 2: delete columns (Age group and Year)
data_sumtable$`Age group`<- NULL
data_sumtable$Year <- NULL
## Step 3: change column names
colnames(data_sumtable) <- gsub("Sample size","N",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Balanced_acc_final","Best Acc",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Responders/nonresponders","Responders/ nonresponders",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Definition of treatment outcome","Definition treatment outcome",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Type of functional-connectivity-based input features","Input features",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Way of estimating the underlying functional connectivities","Estimating FCs",colnames(data_sumtable))
## Step 4: turn values of numeric variables (N and Best ACC) into strings
data_sumtable$N <- as.character(data_sumtable$N)
data_sumtable$`Best Acc` <- as.character(data_sumtable$`Best Acc`)
# More info about settings in flextable: https://ardata-fr.github.io/flextable-book/cell-content.html
# Set defaults
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "left",
line_spacing = 1.5)
# Initialize flex_table
apa_sumtable <- flextable(data_sumtable)
# Set table properties
apa_sumtable <- set_table_properties(apa_sumtable, width = 1, layout = "autofit")
# Save flextable to word
margins <- page_mar(
bottom = 0.5,
top = 0.5,
right = 0.5,
left = 0.5,
header = 0.5,
footer = 0.5,
gutter = 0.5
)
#flextable::save_as_docx(apa_sumtable, path = "plots/Table_1_characteristics.docx", pr_section = format_table_wide)
data_sumtable
The mean raw accuracy of the best model per study is 80%, with a range of 61% - 90%. The “balanced mean accuracy” is 78%, with a range of 58% - 89%.
# Add column for ROB to data_acc using information from 4.8. in data_PROBAST
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc$ROB <- ifelse(data_acc$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")
# Bring data_acc into longformat
data_acc_long <- reshape(data = data_acc, idvar = "Study", varying = c("Reported_Accuracy_rounded","Balanced_acc_final"), v.name = "metric", times = c("Reported_Accuracy_rounded","Balanced_acc_final"), new.row.names = 1:1000, direction = "long")
#data_acc_long$time <- as.factor(data_acc_long$time)
data_acc_long$time <- factor(data_acc_long$time, levels = c("Balanced_acc_final", "Reported_Accuracy_rounded"), labels = c("balanced accuracy","raw accuracy"))
### Plot raw and balanced acc
# New category for studies where balanced and raw accuracy are the same
# Remove studies that did not need a new calculation
studies_reported <- c(data_acc[data_acc$Technique_of_calculating_balanced_acc == "reported","Study"])$Study
data_acc_long <- data_acc_long[!(data_acc_long$Study %in% studies_reported), ]
# Mark studies were we used the proxy of balanced acc
studies_proxy <- c(data_acc[data_acc$Technique_of_calculating_balanced_acc == "proxy",c("Study")])$Study # Add your study names
# Iterate over each study and modify the time column
for (study_name in studies_proxy) {
data_acc_long$Study[data_acc_long$Study == study_name] <- paste0(data_acc_long$Study[data_acc_long$Study == study_name], "*")
}
size_circle = 3
size_half = size_circle * 1.5
pos_half_vert = 0.4 # The higher, the lower the point
pos_half_horiz = 0.25 # The higher, the more left
data_acc_long$Study <- factor(data_acc_long$Study, levels = rev(unique(data_acc_long$Study)))
colour_palette = brewer.pal(2, "Oranges")
color_range <- colorRampPalette(brewer.pal(9, "Oranges"))(100)
# Choose two values that are far from each other
color1 <- color_range[30] # Adjust index as needed
color2 <- color_range[70]
colour_palette = c(color1,color2)
plot_raw_bal_acc <- ggplot(data_acc_long, aes(x = Study, y = metric, color = time,group = Study)) +
geom_point(size = size_circle) +
geom_line(color = "black", size = 0.3) +
scale_color_manual(values = colour_palette)+
geom_text(data = data_acc[data_acc$Study %in% c("Drysdale, 2017","Pei, 2020"),],
label = "\u25D7",
aes(`Study`, `Balanced_acc_final`,colour = "raw accuracy"),
size = size_half,
hjust = pos_half_horiz,
vjust = pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
geom_text(data = data_acc[data_acc$Study %in% c("Drysdale, 2017","Pei, 2020"),],
label = "\u25D6",
aes(`Study`, `Balanced_acc_final`,colour = "balanced accuracy"),
size= size_half,
hjust = pos_half_horiz + 0.57,
vjust= pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
labs(x = "", y = "Accuracy in %") +
theme(legend.position = "bottom")+
#theme(x.axis.text = element_text(size = size_text-1, family = "Helvetica"))+
theme(legend.title=element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE))+
coord_flip()
ggsave("plots/plot_raw_bal_acc.svg",plot_raw_bal_acc, height = 4)
plot_raw_bal_acc
data_acc_meta <- merge(data_acc, data_sumtable[,c("Study","Primary disorder")])
data_acc_meta$`Primary disorder` <- gsub(" & BPD","",data_acc_meta$`Primary disorder`)
data_acc_meta$`Primary disorder` <- gsub("\\(partial\\) ","",data_acc_meta$`Primary disorder`)
data_acc_meta$`Primary disorder`
## [1] "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD" "MDD"
## [11] "MDD" "PTSD" "PTSD"
colnames(data_acc_meta) <- gsub(" ","_",colnames(data_acc_meta))
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc_meta$ROB <- ifelse(data_acc_meta$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")
library(meta)
library(metafor)
# Add column with correctly classified classes per study
data_acc_meta$n_correct_cases <- round(data_acc_meta$Balanced_acc_final * 0.01 * data_acc_meta$Sample_size,0)
data_acc_meta$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")
# Pool proportions
m.prop <- metaprop(event = n_correct_cases,
n = Sample_size,
studlab = Study, # column for study name
data = data_acc_meta,
method = "Inverse", # Inverse variance method
method.ci = "CP", # Clopper-interval to calculate the CI for each study
method.tau = "REML", # restricted maximum likelihood estimator for tau
sm = "PFT", # Freeman-Turkey Double Arcsine transformation
fixed = FALSE,
random = TRUE,
hakn = TRUE, # Hartung-Knapp adjustment for calculating the CI around the pooled effect size
title = "Predicting treatment response")
summary(m.prop)
## Review: Predicting treatment response
##
## proportion 95%-CI %W(random)
## Drysdale, 2017 0.7823 [0.6992; 0.8513] 9.3
## Harris, 2022 0.5833 [0.4983; 0.6648] 9.5
## Hopman, 2021 0.8525 [0.7383; 0.9302] 7.8
## Kong, 2021 0.8902 [0.8018; 0.9486] 8.5
## Moreno-Ortega, 2019 0.8889 [0.6529; 0.9862] 4.5
## Pei, 2020 0.8061 [0.7139; 0.8790] 8.8
## Schultz, 2018 0.7143 [0.4782; 0.8872] 4.9
## Sun, 2020 0.6721 [0.5813; 0.7544] 9.2
## Tian, 2020 0.6792 [0.5816; 0.7666] 9.0
## van Waarde, 2015 0.8444 [0.7054; 0.9351] 7.0
## Wu, 2022 0.8060 [0.6911; 0.8924] 8.0
## Zhutovsky, 2019 0.8182 [0.6729; 0.9181] 6.9
## Zhutovsky, 2021 0.7500 [0.5880; 0.8731] 6.7
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
## - Clopper-Pearson confidence interval for individual studies
# Forest plot
#svg(file='plots/forestplot.svg', width = 8, height = 5)
cairo_pdf("plots/figure_3_forestplot.pdf")
forest_plot <- forest(m.prop,
sortvar = TE,
fontsize = 12,
prediction = FALSE, # Do not show prediction accuracy
print.tau2 = FALSE,
rightlabs = c("Balanced\naccuracy", "95%-CI", "Weight")
#leftlabs = c("Study",)
)
dev.off()
## png
## 2
Please note that we did not report the publication bias in our
meta-analysis as this is not recommended in the case of high
heterogeneity.
## Review: Predicting treatment response
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.72, df = 11, p-value = 0.1134
##
## Sample estimates:
## bias se.bias intercept se.intercept
## 3.1678 1.8416 0.8735 0.1061
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 3.3991)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
## Review: Predicting treatment response
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.41, df = 11, p-value = 0.1876
##
## Sample estimates:
## bias se.bias intercept se.intercept
## 4.7621 3.3891 0.9615 0.0516
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 0.1547)
## - predictor: inverse of total sample size
## - weight: inverse variance of average event probability
## - reference: Peters et al. (2006), JAMA
update(m.prop,
subgroup = Treatment,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2
## Treatment = rTMS 2 0.8089 [0.3054; 1.0000] 0.0007
## Treatment = medication 5 0.7578 [0.5938; 0.8907] 0.0174
## Treatment = ECT 3 0.7891 [0.4715; 0.9862] 0.0130
## Treatment = medication and psychotherapy 1 0.7143 [0.4996; 0.8908] --
## Treatment = psychotherapy 2 0.7867 [0.2848; 1.0000] 0
## tau Q I^2
## Treatment = rTMS 0.0259 1.22 18.1%
## Treatment = medication 0.1317 33.68 88.1%
## Treatment = ECT 0.1140 7.45 73.2%
## Treatment = medication and psychotherapy -- 0.00 --
## Treatment = psychotherapy 0 0.56 0.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 1.42 4 0.8409
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
update(m.prop,
subgroup = Primary_disorder,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2 tau Q
## Primary_disorder = MDD 11 0.7738 [0.7039; 0.8371] 0.0104 0.1017 46.47
## Primary_disorder = PTSD 2 0.7867 [0.2848; 1.0000] 0 0 0.56
## I^2
## Primary_disorder = MDD 78.5%
## Primary_disorder = PTSD 0.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 0.06 1 0.7988
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
update(m.prop,
subgroup = ROB,
tau.common = FALSE)
## Review: Predicting treatment response
##
## Number of studies: k = 13
## Number of observations: o = 972
## Number of events: e = 728
##
## proportion 95%-CI
## Random effects model 0.7748 [0.7164; 0.8284]
##
## Quantifying heterogeneity:
## tau^2 = 0.0087 [0.0026; 0.0264]; tau = 0.0933 [0.0508; 0.1625]
## I^2 = 74.7% [56.4%; 85.3%]; H = 1.99 [1.51; 2.61]
##
## Test of heterogeneity:
## Q d.f. p-value
## 47.45 12 < 0.0001
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2 tau
## ROB = low risk of data leakage 9 0.7619 [0.6852; 0.8312] 0.0095 0.0976
## ROB = high risk of data leakage 4 0.8071 [0.6459; 0.9301] 0.0089 0.0944
## Q I^2
## ROB = low risk of data leakage 37.06 78.4%
## ROB = high risk of data leakage 9.49 68.4%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 0.58 1 0.4459
##
## Details on meta-analytical method:
## - Inverse variance method
## - Restricted maximum-likelihood estimator for tau^2
## - Q-Profile method for confidence interval of tau^2 and tau
## - Hartung-Knapp adjustment for random effects model (df = 12)
## - Freeman-Tukey double arcsine transformation
Treatment, primary disorder, and risk of data leakage cannot explain the between study heterogeneity.
m.prop.reg <- metareg(m.prop, ~Sample_size, hakn = TRUE)
# Try different types of bubble plots
bubble(m.prop.reg, studlab = TRUE)
# Retransform accuracy values
regplot(m.prop.reg, transf = transf.ilogit, mod = "Sample_size")
regplot(m.prop.reg, transf = transf.ipft.hm, mod = "Sample_size", targ = list(ni=m.prop.reg[["data"]][[".n"]]))
m.prop.reg
##
## Mixed-Effects Model (k = 13; tau^2 estimator: REML)
##
## tau^2 (estimated amount of residual heterogeneity): 0.0047 (SE = 0.0036)
## tau (square root of estimated tau^2 value): 0.0685
## I^2 (residual heterogeneity / unaccounted variability): 57.79%
## H^2 (unaccounted variability / sampling variability): 2.37
## R^2 (amount of heterogeneity accounted for): 46.04%
##
## Test for Residual Heterogeneity:
## QE(df = 11) = 26.1344, p-val = 0.0062
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 11) = 6.3943, p-val = 0.0280
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 1.2096 0.0620 19.5100 11 <.0001 1.0731 1.3460 ***
## Sample_size -0.0017 0.0007 -2.5287 11 0.0280 -0.0031 -0.0002 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The sample size has a substantial effect on the accuracy.
The mean sample size was N = 75, with a range of [18, 144].
# Add weigth
m.prop.reg$data$weight = weights(m.prop.reg)
# Calculate predicted values to retransform the regression based on transformed proportions back to untransformed proportions
mod_values <- seq(min(m.prop.reg$data$Sample_size), max(m.prop.reg$data$Sample_size), length.out = 100)
predicted_values <- predict(m.prop.reg, newmods = (Sample_size = mod_values), transf = transf.ipft.hm, targ = list(ni=m.prop.reg[["data"]][[".n"]]))
# Add column for type of treatment manually to data_acc
data_acc$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")
# Plot settings
pos_half_vert = 0.3 # The higher, the lower the point
pos_half_horiz = 0.2 # The higher, the more left
range_min = 3
range_max = 8
size_half = range_min + 21 * ((range_max-range_min)/(144-18)) + 1.7 # 21 = sample size of Schultz, 144-18 = range of sample size
m.prop.reg$data$weight
## [1] 9.948465 10.378650 7.612999 8.632395 3.663075 9.219671 4.086171
## [8] 9.900061 9.469186 6.545225 7.940444 6.466879 6.136779
# Plot sample size and balanced accuracy
plot_acc_contr_sample_size <- ggplot(m.prop.reg$data, aes(x = Sample_size, y = Balanced_acc_final, color = Treatment))+
theme(text = element_text(family = "Arial"))+
geom_point(aes(size=m.prop.reg$data$weight))+
scale_size(range = c(range_min,range_max),guide = "none") +
geom_line(data = data.frame(Sample_size = mod_values, yi = predicted_values), aes(x = Sample_size, y = 100 *(yi.pred)), color = "black", size = 0.5) +
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D7",
aes(`Sample_size`, `Balanced_acc_final`,colour = "medication"),
size= size_half,
hjust = pos_half_horiz,
vjust = pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D6",
aes(`Sample_size`, `Balanced_acc_final`,colour = "psychotherapy"),
size= size_half,
hjust = pos_half_horiz + 0.55,
vjust= pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
scale_color_manual(
breaks = c("medication", "ECT","psychotherapy","rTMS"),
values = c("ECT" = color_pal[1],
"medication" = color_pal[2],
"medication and psychotherapy" = "grey",
"psychotherapy" = color_pal[3],
"rTMS" = color_pal[4])
) +
# Plot meta-regression slope
labs(y = "Balanced accuracy in %", x = "Sample size")+
scale_x_continuous(limits=c(0,147),
breaks = c(0,25,50,75,100,125,150)) +
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
ggsave("plots/plot_samplesize.svg",plot_acc_contr_sample_size, height = 4)
plot_acc_contr_sample_size
# Combine both plots (Accuracy with risk of bias and Sample size and balanced accuracy)
plot_combined <- plot_raw_bal_acc + plot_spacer() + plot_acc_contr_sample_size + plot_annotation(tag_levels = "A") + plot_layout(width = c(1,0.05,2), ncol = 3)
# Save as pdf for journal
cairo_pdf("plots/figure_2_acc_meth_sample_size.pdf", height = 3.5)
plot_combined
dev.off()
## png
## 2
ggsave("plots/figure_2_acc_meth_sample_size.svg",plot_combined, height = 3.5)
# Plot data without risk of bias
labels_acc_germ <- c("berichtete Genauigkeit","für Zufallsniveau\nkontrollierte Genauigkeit")
labels_acc_eng <- c("reported\naccuracy","balanced\naccuracy")
plot_acc_contr_violin <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots) +
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
scale_x_discrete(labels = labels_acc_eng)
# Plot data with risk of bias
plot_acc_contr_violin_rob <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots-0.5, aes(colour = `ROB`)) +
scale_colour_manual(name = "Risk of bias", values=c("grey69","black"))+
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots -0.5,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
scale_x_discrete(labels = labels_acc_eng)+
guides(color = guide_legend(nrow = 2, byrow = TRUE))
# Display and Save both plots
plot_acc_contr_violin
plot_acc_contr_violin_rob
ggsave("plots/violin_acc_normal_and_controlled.svg", plot_acc_contr_violin)
ggsave("plots/violin_acc_normal_and_controlled_rob.svg",plot_acc_contr_violin_rob, height = 4)
data_imp_features_clean <- data_imp_features[!(is.na(data_imp_features$Features_with_high_predictive_value)),]
12 out of 13 studies made a statement on feature importance.
# Reduce to columns needed
data_imp_features_table <- data_imp_features_clean[,c("Study","Way_of_measuring_predictive_value", "Way_of_measuring_predictive_value_categories")]
colnames(data_imp_features_table)[colnames(data_imp_features_table) == "Way_of_measuring_predictive_value_categories"] <- "Category"
# Turn hyphen into space
colnames(data_imp_features_table) <- gsub("_"," ",colnames(data_imp_features_table))
# Flextable
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
ft <- flextable(data_imp_features_table)
ft <- set_table_properties(ft, layout = "autofit")
ft <- autofit(ft)
flextable::save_as_docx(ft, path = "plots/RQ2.1_table.docx", pr_section = format_table_portrait)
data_imp_features_table
# Add new rows when two categories were chosen
data_imp_features_clean_way <- data_imp_features_clean
for (row_idx in 1:nrow(data_imp_features_clean)){
entry <- data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]
if (grepl(";",entry)){
categories <- strsplit(entry,"; ")[[1]]
# Copy row
data_imp_features_clean_way <- rbind(data_imp_features_clean_way,data_imp_features_clean_way[row_idx,])
# Replace category in old row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]<-categories[1]
# Replace category in new row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[nrow(data_imp_features_clean_way)]<-categories[2]
}
}
# Plot data
plot_measure_imp <- ggplot2::ggplot(data = data_imp_features_clean_way,aes(x = `Way_of_measuring_predictive_value_categories`, color = `Study`))+
geom_bar() +
theme(legend.position = "none")+
xlab("")+
coord_flip()+
ggtitle("Ways to measure high predictive value")
library(plotly)
ggplotly(plot_measure_imp)
# Reduce_to_columns_needed
data_imp_features_level_table <- data_imp_features_clean[,c("Study","Type_of_FC-based_input_features","Features_with_high_predictive_value","Resolution_of_reporting_features_with_high_predictive_value")]
# Change colnames
colnames(data_imp_features_level_table)[colnames(data_imp_features_level_table) == "Type_of_FC-based_input_features"] <- "FC-based input features"
# Turn hyphen into space
colnames(data_imp_features_level_table) <- gsub("_"," ",colnames(data_imp_features_level_table))
# Remove unnecessary information
data_imp_features_level_table$`FC-based input features` <- gsub("[(]wb:.*)","",data_imp_features_level_table$`FC-based input features`)
# Flextable
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
ft <- flextable(data_imp_features_level_table)
ft <- set_table_properties(ft, layout = "autofit")
ft <- autofit(ft)
flextable::save_as_docx(ft, path = "plots/RQ2.2_resolution.docx", pr_section = format_table_wide)
data_imp_features_level_table
# Get column names
cols_tested <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_tested")]
cols_important <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_important")]
# First: Reshape tested-columns
data_imp_features_clean$ID = rownames(data_imp_features_clean)
data_ROIs_tested_long <- reshape(data = data_imp_features_clean,idvar = "Study", new.row.names = 1:20000,
varying = cols_tested, v.name = "tested", times = cols_tested, drop = cols_important, direction = "long")
colnames(data_ROIs_tested_long)[colnames(data_ROIs_tested_long)=="time"] <- 'ROI'
data_ROIs_tested_long$ROI <- gsub("_tested","",data_ROIs_tested_long$ROI)
data_ROIs_tested_long$ID <- paste(data_ROIs_tested_long$`Study`,data_ROIs_tested_long$ROI)
# Second: Reshape important-columns
data_ROIs_important_long <- reshape(data = data_imp_features_clean , idvar = "ID",new.row.names = 1:20000,
varying = cols_important, v.name = "important", times = cols_important, direction = "long", drop = cols_tested)
data_ROIs_important_long$ROI <- gsub("_important","",data_ROIs_important_long$time)
data_ROIs_important_long$ID <- paste(data_ROIs_important_long$`Study`,data_ROIs_important_long$ROI)
# Third: Merge both data sets
data_ROIs <- merge(data_ROIs_tested_long,data_ROIs_important_long)
data_ROIs$time <- gsub("_important","",data_ROIs$time)
colnames(data_ROIs)[colnames(data_ROIs)=="time"] <- 'Region'
# Exclude ROIs, when they were not tested
data_ROIs_only_tested <- data_ROIs[!c(data_ROIs$tested == "n"|is.na(data_ROIs$tested)|data_ROIs$tested == "NA"),]
# Convert to factor
data_ROIs_only_tested$important <- as.factor(data_ROIs_only_tested$important)
data_ROIs_only_tested$Region <-
gsub("_"," ",data_ROIs_only_tested$Region)
# Wide data set with absolute and relative freq per region
freq_all <- as.data.frame(table(data_ROIs_only_tested$Region))
data_ROIs_only_tested_important <- data_ROIs_only_tested[data_ROIs_only_tested$important=="y",]
data_ROIs_only_tested_nonimportant <- data_ROIs_only_tested[data_ROIs_only_tested$important=="n",]
freq_important <- as.data.frame(table(data_ROIs_only_tested_important$Region))
colnames(freq_important) <- c("Var1","Abs_frequency")
freq_nonimportant <- as.data.frame(table(data_ROIs_only_tested_nonimportant$Region))
colnames(freq_nonimportant) <- c("Var1","Freq_nonimportant")
freq_all <- merge(freq_all,freq_important,by= "Var1")
freq_all <- merge(freq_all,freq_nonimportant,by= "Var1")
freq_all$Rel_frequency <- round((freq_all$Abs_frequency/freq_all$Freq)*100)
# Get features with relative frequencies above 30% and order them according to frequency
temp_dataset <- freq_all[freq_all$Rel_frequency>30, c("Var1","Rel_frequency")]
temp_dataset <- arrange(temp_dataset,Rel_frequency)
Regions_high_freq <- temp_dataset$Var1
data_ROIs_only_tested$Region <- factor(data_ROIs_only_tested$Region, levels = Regions_high_freq)
This plot shows whose connectivities were particularily important for the prediction of TR.
# Reduce data set to only high frequency regions
data_ROIs_only_tested_high_freq <- data_ROIs_only_tested[data_ROIs_only_tested$Region %in% Regions_high_freq,]
plot_feat_pred_value <- ggplot2::ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar()+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_manual(palette = colour_palette)+
geom_text(aes(label = scales::percent(..count../tapply(..count.., ..x.. ,sum)[..x..])),stat = "count", position= position_stack(vjust = 0.5))+
xlab("")+
coord_flip()
#https://stackoverflow.com/questions/55680449/ggplot-filled-barplot-with-percentage-labels
# Change labels
scaleFUN <- function(x) x*100
legend_predvalue_eng <- c("no predictive value", "predictive value")
# Plot
plot_feat_pred_value_2 <- ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar(aes (y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_brewer(palette = "Oranges", labels = legend_predvalue_eng)+
scale_y_continuous(labels=scaleFUN)+
geom_text(stat = "count", aes(label = paste0("n=",..count..)), position = position_fill(vjust =0.5))+
xlab("")+
ylab("Relative Frequency in %")+
theme(legend.title=element_blank(),legend.position = "right") +
#theme (plot.margin=unit (c (8,5.5,5.5,5.5),units = "pt"))+
#scale_fill_manual()+
coord_flip()
cairo_pdf("plots/figure_4_predictive_value.pdf", height = 4)
plot_feat_pred_value_2
dev.off()
## png
## 2
# Save plot
plot_feat_pred_value_2
ggsave("plots/figure_4_predictive_value.svg",plot_feat_pred_value_2,height = 4)
# Reduce to important columns
data_dimreduction_table <- data_dimreduction[, !(names(data_dimreduction) %in% c("Year", "Comments")), drop = FALSE]
colnames(data_dimreduction_table) <- gsub("_"," ",colnames(data_dimreduction_table))
# Flextable
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
ft <- flextable(data_dimreduction_table)
ft <- rotate(ft, j = 3:9, rotation = "btlr", part = "header", align = "center")
ft <- height(ft, height = 1.5, part = "header")
ft <- hrule(ft, rule = "exact", part = "header")
ft <- set_table_properties(ft, layout = "autofit")
flextable::save_as_docx(ft, path = "plots/RQ3_table.docx", pr_section = format_table_portrait)
data_dimreduction_table
library(plotly)
# Bring data into longformat
cols_reduction <- c("atlas-based_parcellation", "data-driven_parcellation", "theory-based_selection")
cols_feat_extract <- colnames(data_dimreduction)[!(colnames(data_dimreduction) %in% c("Study","Year","Approach_to_reduce_the_number_of_initially_available_connectivities","Comments",cols_reduction))]
data_dimreduction_long <- reshape(data = data_dimreduction, idvar = "Study",varying = c(cols_reduction,cols_feat_extract), v.name = "applied", times = c(cols_reduction,cols_feat_extract), new.row.names = 1:1000, direction = "long")
# Plot for feature generation
plot_reduction_generation <- ggplot(data = data_dimreduction_long[((data_dimreduction_long$applied == "y")& data_dimreduction_long$time %in% cols_reduction),],aes(x = `time`, color = `Study`))+
geom_bar() +
coord_flip() +
xlab("")+
theme(legend.position = "none", plot.title = element_text(size =14))+
ggtitle("Feature generation")
ggplotly(plot_reduction_generation)
# Plot for feature extraction
plot_reduction_extraction <- ggplot2::ggplot(data = data_dimreduction_long[((data_dimreduction_long$applied == "y")& data_dimreduction_long$time %in% cols_feat_extract),],aes(x = `time`, color = `Study`))+
geom_bar() +
theme(legend.position = "none", plot.title = element_text(size=14))+
coord_flip() +
xlab("")+
ggtitle("Feature selection")
ggplotly(plot_reduction_extraction)
# Prepare dataset
# Remove columns that contain "comments"
cols_no_comments <- colnames(data_PROBAST)[!grepl("comments",colnames(data_PROBAST))]
data_PROBAST_table <- data_PROBAST[,cols_no_comments]
# Transpose table
PROBAST_transposed <- t(data_PROBAST_table)
PROBAST_transposed <- as.data.frame(PROBAST_transposed)
colnames(PROBAST_transposed) <- PROBAST_transposed[1, ]
PROBAST_transposed <- PROBAST_transposed[-1, ]
# Use rownames as first column (as flextable ignores rownames)
PROBAST_transposed <- PROBAST_transposed %>%
tibble::rownames_to_column(var = "Question")
# Flextable settings
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "center",
line_spacing = 1)
# Create flextable
ft <- flextable(PROBAST_transposed)
ft <- set_table_properties(ft, width = 1, layout = "autofit")
ft <- autofit(ft)
ft <- hline(ft, part = "body", i = c(3, 7, 14, 24),
border = fp_border(color = "black", width = 1))
flextable::save_as_docx(ft, path = "plots/PROBAST_table.docx", pr_section = format_table_wide)
# Final rating columns
cols_final_rating <- c("Final rating domain 1 (risk of bias)", "Final rating domain 2 (risk of bias)", "Final rating domain 3 (risk of bias)", "Final rating domain 4 (risk of bias)", "Final rating total (risk of bias)")
data_PROBAST_plot <- data_PROBAST[c("Study",cols_final_rating)]
# Bring data into long-format
data_PROBAST_plot_long <- reshape(data = data_PROBAST_plot,idvar = "Study", new.row.names = 1:20000,varying = cols_final_rating, v.name = "ROB", times = cols_final_rating, direction = "long")
colnames(data_PROBAST_plot_long)[colnames(data_PROBAST_plot_long)=="time"] <- 'Rating_domain'
# Order and rename factor rating_domain
PROBAST_domains_eng <- c("ROB Sample","ROB Predictors", "ROB Outcome", "ROB Analysis", "ROB Total")
data_PROBAST_plot_long$Rating_domain <- factor(data_PROBAST_plot_long$Rating_domain,
levels = cols_final_rating, labels = PROBAST_domains_eng)
scaleFUN <- function(x) x*100
# Prepare final rating domains labels to make one label bold
breaks <- levels(data_PROBAST_plot_long$Rating_domain)
labels <- as.expression(breaks)
labels[[5]] <- bquote(bold(.(labels[[5]])))
# Plot data
legend_PROBAST_eng <- c("high", "low")
plot_PROBAST <- ggplot(data = data_PROBAST_plot_long,aes(x = `Rating_domain`, fill = `ROB`))+ geom_bar(aes (alpha = Rating_domain == "ROB Gesamt", y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
coord_flip()+
scale_fill_manual(values = c("red3","chartreuse3"), name = "Risk of bias (ROB)", labels = legend_PROBAST_eng <- c("high", "low")) +
scale_y_continuous(labels=scaleFUN)+
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0.6), guide = F)+
scale_x_discrete(label = labels, breaks = breaks)+
theme(legend.position = "top")+
xlab("")+
ylab("Relative Frequency in %")
# Save plot
plot_PROBAST
ggsave("plots/figure_S1_PROBAST.svg",plot_PROBAST, height = 4.5, width = 9)